**********************************************************
*** Regression ***
**********************************************************

gen gambling_total = lottery_exp + gambling_exp

foreach var in gambling_total fuel hs books holiday {
gen `var'_dum = 0 if `var' == 0
replace `var'_dum = 1 if `var' > 0 & `var' !=.
}

merge 1:1 ind_id edate using deb_cred_all_coholding.dta 
*ind_id and edate should perfectly match
*This is a 1:1 merge because the combination of the values 
*ind_id and edate uniquely identifies observations in both
*datasets.
*In a 1:1 merge, not all observations necessarily match.
*See stata documentation on topic "merge".
keep if cash != . 
*ensures that, depending on whether training set or full sample is activated, 
*we only have individuals with non-missing cash. 

*Generate credit/debit-card-related variables
gen total_cred = groc_cred + alc_cred + rmf_cred + fuel_cred + hi_cred + ///
hs_cred + transp_cred + sa_cred + ca_cred + pharm_cred + recr_cred + ///
lott_exp_cred + lott_char_exp_cred + gambling_exp_cred + game_cred + ///
books_cred + craft_cred + edu_cred + fish_cred + recr_area_cred + ///
spec_cred + ts_cred + swim_cred + toys_cred + char_cred + media_cred  

gen total_deb = groc_deb + alc_deb + rmf_deb + fuel_deb + hi_deb + ///
hs_deb + transp_deb + sa_deb + ca_deb + pharm_deb + recr_deb + ///
lott_exp_deb + lott_char_exp_deb + gambling_exp_deb + game_deb + ///
books_deb + craft_deb + edu_deb + fish_deb + recr_area_deb + ///
spec_deb + ts_deb + swim_deb + toys_deb + char_deb + media_deb 

gen dur_cred = hi_cred + ca_cred + transp_cred
gen nondur_cred = groc_cred + fuel_cred + rmf_cred + pharm_cred + recr_cred + ///
char_cred + sa_cred 
gen tempt_cred = alc_cred + (lott_exp_cred + lott_char_exp_cred) + ///
gambling_exp_cred + game_cred

gen dur_deb = hi_deb + ca_deb + transp_deb
gen nondur_deb = groc_deb + fuel_deb + rmf_deb + pharm_deb + recr_deb + ///
char_deb + sa_deb 
gen tempt_deb = alc_deb + (lott_exp_deb + lott_char_exp_deb) + ///
gambling_exp_deb + game_deb

foreach var in groc alc rmf fuel hi hs transp ///
sa ca pharm recr lott_exp lott_char_exp gambling_exp ///
game books craft edu fish recr_area spec ///
ts swim toys char media dur nondur tempt ///
{
gen cr_`var' = `var'_cred/(`var'_cred + `var'_deb)
}

gen cred_ratio = total_cred / (total_cred+total_deb)

sort ind_id edate
gen pr_cohold_start_reg = pr_cohold_start
replace pr_cohold_start_reg = 0 if ///
	pr_cohold_start_reg[_n]==1 & pr_cohold_start_reg[_n+1]==1 

foreach var in ///
total_income total_cred total_deb {
gen t_`var' = log(`var' + (`var'^2 + 1)^(1/2))  
}

gen pr_cohold_start_light_reg = pr_cohold_start_reg
replace pr_cohold_start_light_reg = 0 if CoholdDayDegree !=2 

gen pr_cohold_start_intense_reg = pr_cohold_start_reg
replace pr_cohold_start_intense_reg = 0 if CoholdDayDegree !=3



******************************************************************
*Table 6: OLS Regression: Probability of Co-hold Period Starting
******************************************************************	
eststo a: reg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
age female alwaysin2memberhh benefits_pers t_total_income i.area_id ///
salary_day ///
if coholder ==1, robust

eststo b: reg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
age female alwaysin2memberhh benefits_pers t_total_income i.area_id ///
dur_exp nondur_exp ///
ca_count sa_count ///
if coholder==1, robust

eststo c: reg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
age female alwaysin2memberhh benefits_pers t_total_income i.area_id ///
t_total_deb /// //from c
if coholder==1, robust

eststo d: reg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
age female alwaysin2memberhh benefits_pers t_total_income i.area_id ///
t_total_cred /// //from d
if coholder==1, robust

eststo e: reg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
age female alwaysin2memberhh benefits_pers t_total_income i.area_id ///
gambling_total_dum tempt_dum fuel_dum hs_dum books_dum holiday_dum /// //from e
if coholder ==1, robust

eststo f: reg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
age female alwaysin2memberhh benefits_pers t_total_income i.area_id ///
if coholder ==1, robust

eststo g: reg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
age female alwaysin2memberhh benefits_pers t_total_income i.area_id ///
salary_day /// //from a
dur_exp nondur_exp /// //from b
ca_count sa_count /// //from b
t_total_deb /// //from c
t_total_cred /// //from d
gambling_total_dum tempt_dum fuel_dum hs_dum books_dum holiday_dum /// //from e
if coholder ==1, robust

esttab a b c d e f g using "Tables/ols_probability.tex", replace se ///
booktabs ///
cells(b(fmt(4) star) se(par fmt(4))) ///
collabels(none) ///
mlabels(none) ///
mgroups("Probability of cohold period starting", pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
starlevels(* 0.10 ** 0.05 *** 0.01) /// 
stats(r2 N, fmt(%9.3f %15.0fc) ///
	label(R-square \#Observations)) ///
varlabels( ///
age "Age" female "Female" alwaysin2memberhh "Linked" ///
benefits_pers "Benefits person" t_total_income "Log total income" ///
salary_day "Payday$^1$" ///
dur_exp "Durables$^2$" nondur_exp "Non-durables$^2$" ///
ca_count "Nr. current accounts" sa_count "Nr. savngs accounts" ///
t_total_deb "Log cash spendings" ///
t_total_cred "Log credit card spendings" ///
gambling_total_dum "Gambling$^3$" ///
tempt_dum "Temptations$^3$" fuel_dum "Fuel$^3$" hs_dum "Home securities$^3$" ///
books_dum "Books$^3$" holiday_dum "Holidays$^3$" ///
_cons "Constant" ///
) ///
drop(*area* day_of_week ind_emy_seq) ///
substitute(; ,)



******************************************************************
*Table 7: Individual Fixed Effects Regression: Probability of Co-hold Period Starting
******************************************************************	
tsset new_ind_id edate_id

eststo a: xtreg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
t_total_income i.emy i.area_id ///
salary_day ///
if coholder ==1, fe cluster(new_ind_id)
estadd local Ind = "`: di %15.0fc `=e(N_clust)''"

eststo b: xtreg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
t_total_income i.emy i.area_id ///
dur_exp nondur_exp ///
ca_count sa_count ///
if coholder==1, fe cluster(new_ind_id)
estadd local Ind = "`: di %15.0fc `=e(N_clust)''"

eststo c: xtreg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
t_total_income i.emy i.area_id ///
t_total_deb ///
if coholder==1, fe cluster(new_ind_id)
estadd local Ind = "`: di %15.0fc `=e(N_clust)''"

eststo d: xtreg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
t_total_income i.emy i.area_id ///
t_total_cred ///
if coholder==1, fe cluster(new_ind_id)
estadd local Ind = "`: di %15.0fc `=e(N_clust)''"

eststo e: xtreg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
t_total_income i.area_id ///
gambling_total_dum tempt_dum fuel_dum hs_dum books_dum holiday_dum /// //from e
if coholder ==1, fe cluster(new_ind_id)
estadd local Ind = "`: di %15.0fc `=e(N_clust)''"

eststo f: xtreg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
t_total_income i.emy i.area_id ///
if coholder ==1, fe cluster(new_ind_id)
estadd local Ind = "`: di %15.0fc `=e(N_clust)''"

eststo g: xtreg pr_cohold_start_reg ///
day_of_week ind_emy_seq ///
t_total_income i.area_id ///
salary_day /// //from a
dur_exp nondur_exp /// //from b
ca_count sa_count /// //from b
t_total_deb /// //from c
t_total_cred /// //from d
gambling_total_dum tempt_dum fuel_dum hs_dum books_dum holiday_dum /// //from e
if coholder ==1, fe cluster(new_ind_id)
estadd local Ind = "`: di %15.0fc `=e(N_clust)''"


esttab a b c d e f g using "Tables/fixed_effects.tex", replace se ///
booktabs ///
cells(b(fmt(4) star) se(par fmt(4))) ///
collabels(none) ///
mlabels(none) ///
mgroups("Probability of cohold period starting", pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
starlevels(* 0.10 ** 0.05 *** 0.01) /// 
stats(r2 N Ind, fmt(%9.3f %15.0fc %15.0fc) ///
	label("R-square" "\#Observations" "\#Individuals")) ///
varlabels( ///
age "Age" female "Female" alwaysin2memberhh "Linked" ///
benefits_pers "Benefits person" t_total_income "Log total income" ///
salary_day "Payday$^1$" ///
dur_exp "Durables$^2$" nondur_exp "Non-durables$^2$" ///
ca_count "Nr. current accounts" sa_count "Nr. savngs accounts" ///
t_total_deb "Log cash spendings" ///
t_total_cred "Log credit card spendings" ///
gambling_total_dum "Gambling$^3$" ///
tempt_dum "Temptations$^3$" fuel_dum "Fuel$^3$" hs_dum "Home securities$^3$" ///
books_dum "Books$^3$" holiday_dum "Holidays$^3$" ///
_cons "Constant" ///
) ///
drop(*area* *emy* day_of_week ind_emy_seq) ///
substitute(; ,)


**********************************************************
* Table A1: Ordinary Least Squares Estimates for Duration vs Frequency of Co-Holding
**********************************************************	

binscatter indmean_cohold_length nr_spells if edate_id == 1335, ///
mcolors(blue) line(qfit) lcolor(blue) ///
title("Duration of coholding (in days)", place(left) size(large)) ///
ytitle("") ///
xtitle("Number of coholding spells", size(large)) ///
ylabel(, labsize(medium) angle(0)) ///
xlabel(, labsize(medium)) ///
nquantiles(20) 

eststo clear
eststo a: reg indmean_cohold_length nr_spells if edate_id == 1335, robust 
eststo b: reg indmean_cohold_length nr_spells day_of_week ind_emy_seq ///
age female alwaysin2memberhh benefits_pers t_total_income i.area_id salary_day if edate_id == 1335, robust
eststo c: reg indmean_cohold_length nr_spells day_of_week ind_emy_seq ///
age female alwaysin2memberhh benefits_pers t_total_income i.area_id ///
salary_day dur_exp nondur_exp ca_count sa_count t_total_deb /// 
t_total_cred tlottery_dum gambling_dum tempt_dum alc_dum /// 
total_logins if edate_id == 1335, robust 

esttab a b c  using "Reg2.tex", replace se ///
booktabs ///
cells(b(fmt(4) star) se(par fmt(4))) ///
collabels(none) ///
mlabels(none) ///
mgroups("Spell Length", pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
starlevels(* 0.10 ** 0.05 *** 0.01) /// 
stats(r2 N, fmt(%9.3f %15.0fc) ///
	label(R-square \#Observations)) ///
varlabels( ///
nr_spells "Number of Spells" ///
age "Age" female "Female" alwaysin2memberhh "Linked" ///
benefits_pers "Benefits person" t_total_income "Log total income" ///
salary_day "Payday$^1$" ///
dur_exp "Durables$^2$" nondur_exp "Non-durables$^2$" ///
ca_count "Nr. current accounts" sa_count "Nr. savngs accounts" ///
t_total_deb "Log cash spendings" ///
t_total_cred "Log credit card spendings" ///
tlottery_dum "Lottery$^3$" gambling_dum "Gambling$^3$" ///
tempt_dum "Temptations$^3$" alc_dum "Alcohol$^3$" ///
total_logins "Logins$^4$" ///
_cons "Constant" ///
) ///
drop(*area* day_of_week ind_emy_seq) ///
substitute(; ,)
